# -*- coding: utf-8 -*-

def docid2path(docid):
    if docid[:3] == "cmn":
        elements = docid.split('-')
        dir1 = "web"
        dir2 = elements[2] if len(elements[2]) == 2 else '0' + elements[2]
        dir3 = elements[2] + '-' + elements[3]
        return 'web', dir1 + '/' + dir2 + '/' + dir3 + '/' + docid + '.sgm'
    else:
        dir1 = "news"
        dir2 = docid[:7].lower()
        dir3 = docid[:14].lower()
        return 'news', dir1 + '/' + dir2 + '/' + dir3 + '/' + docid

def get_plain_path(data_path, relative_path):
    return data_path + '/doc/' + relative_path

def get_postag_path(data_path, relative_path):
    return data_path + '/doc-pro/' + relative_path

if __name__ == "__main__":
    print(docid2path("XIN_CMN_20100512.0336"))
